package com.yc.mvc.web;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.util.Iterator;

public class pc {

    public static void main(String[] args) throws IOException {

        /*final URL url = new URL("https://www.0734zpw.com/offer-1");

        final Document doc = Jsoup.parse(url, 5000);
        //相当于document.querySelectorAll(样式选择器)
        final Elements as = doc.select("a");
        as.forEach(a->{
            if(a.attr("href").matches("/\\w+/\\d+/\\d+\\.htm")){
                final String href= a.attr("href");
                System.out.println(href);

            }
        });*/

        dowmloadHTML("https://www.0734zpw.com", "D:\\爬虫图片");
    }


    public static void dowmloadHTML(String webpath, String dir) throws IOException {
        System.out.println("开始下载:" + webpath);
        //解析页面,下载样式
        final URL url = new URL(webpath + "/offer-1");
        final Document doc = Jsoup.parse(url, 5000);
        final Elements elements = doc.select("span[class='s0']> a");
        Iterator<Element> iterator = elements.iterator();
        while (iterator.hasNext()) {
            final Element link = iterator.next();
            String href = link.attr("href");
            String newhref = webpath + href;
            //  dowmloadFile(newhref);
            System.out.println(newhref);
        }

    }

}
